LINUX_SERIES = 2.6
-LINUX_VER = 2.6.16
+LINUX_VER = 2.6.16.13
EXTRAVERSION ?= xen
#include <linux/smp_lock.h>
#include <linux/highmem.h>
#include <linux/ptrace.h>
+#include <linux/audit.h>
#include <asm/uaccess.h>
#include <asm/io.h>
#ifndef CONFIG_X86_NO_TSS
struct tss_struct *tss;
#endif
+ long eax;
/*
* make sure the vm86() system call doesn't try to do anything silly
*/
tsk->thread.screen_bitmap = info->screen_bitmap;
if (info->flags & VM86_SCREEN_BITMAP)
mark_screen_rdonly(tsk->mm);
+ __asm__ __volatile__("xorl %eax,%eax; movl %eax,%fs; movl %eax,%gs\n\t");
+ __asm__ __volatile__("movl %%eax, %0\n" :"=r"(eax));
+
+ /*call audit_syscall_exit since we do not exit via the normal paths */
+ if (unlikely(current->audit_context))
+ audit_syscall_exit(current, AUDITSC_RESULT(eax), eax);
+
__asm__ __volatile__(
- "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t"
"movl %0,%%esp\n\t"
"movl %1,%%ebp\n\t"
"jmp resume_userspace"
: /* no outputs */
- :"r" (&info->regs), "r" (task_thread_info(tsk)) : "ax");
+ :"r" (&info->regs), "r" (task_thread_info(tsk)));
/* we never return here */
}
$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
$(call if_changed,syscall)
-AFLAGS_vsyscall-sysenter.o = -m32 -Iarch/i386/kernel
-AFLAGS_vsyscall-syscall.o = -m32 -Iarch/i386/kernel
+AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32 -Iarch/i386/kernel
+AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32 -Iarch/i386/kernel
ifdef CONFIG_XEN
-AFLAGS_vsyscall-int80.o = -m32 -Iarch/i386/kernel
+AFLAGS_vsyscall-int80.o = -m32 -Wa,-32 -Iarch/i386/kernel
CFLAGS_syscall32-xen.o += -DUSE_INT80
AFLAGS_syscall32_syscall-xen.o += -DUSE_INT80
*
* XXX if we had a free scratch register we could save the RSP into the stack frame
* and report it properly in ps. Unfortunately we haven't.
+ *
+ * When user can change the frames always force IRET. That is because
+ * it deals with uncanonical addresses better. SYSRET has trouble
+ * with them due to bugs in both AMD and Intel CPUs.
*/
ENTRY(system_call)
xorl %esi,%esi # oldset -> arg2
call ptregscall_common
1: movl $_TIF_NEED_RESCHED,%edi
- jmp sysret_check
+ /* Use IRET because user could have changed frame. This
+ works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
+ cli
+ jmp int_with_check
badsys:
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
call syscall_trace_leave
RESTORE_TOP_OF_STACK %rbx
RESTORE_REST
- jmp ret_from_sys_call
+ /* Use IRET because user could have changed frame */
+ jmp int_ret_from_sys_call
CFI_ENDPROC
/*
CFI_ADJUST_CFA_OFFSET -8
CFI_REGISTER rip, r11
SAVE_REST
- movq %r11, %r15
- CFI_REGISTER rip, r15
FIXUP_TOP_OF_STACK %r11
call sys_execve
- GET_THREAD_INFO(%rcx)
- bt $TIF_IA32,threadinfo_flags(%rcx)
- CFI_REMEMBER_STATE
- jc exec_32bit
RESTORE_TOP_OF_STACK %r11
- movq %r15, %r11
- CFI_REGISTER rip, r11
- RESTORE_REST
- pushq %r11
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rip, 0
- ret
-
-exec_32bit:
- CFI_RESTORE_STATE
movq %rax,RAX(%rsp)
RESTORE_REST
jmp int_ret_from_sys_call
* This is basically '__unlazy_fpu', except that we queue a
* multicall to indicate FPU task switch, rather than
* synchronously trapping to Xen.
+ * This must be here to ensure both math_state_restore() and
+ * kernel_fpu_begin() work consistently.
+ * The AMD workaround requires it to be after DS reload, or
+ * after DS has been cleared, which we do in __prepare_arch_switch.
*/
if (prev_p->thread_info->status & TS_USEDFPU) {
__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
+ /* Enable workaround for FXSAVE leak */
+ if (c->x86 >= 6)
+ set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
+
r = get_model_name(c);
if (!r) {
switch (c->x86) {
}
task_lock(p);
if (p->files) {
- rcu_read_lock();
+ /*
+ * We don't take a ref to the file, so we must
+ * hold ->file_lock instead.
+ */
+ spin_lock(&p->files->file_lock);
fdt = files_fdtable(p->files);
for (i=0; i < fdt->max_fds; i++) {
filp = fcheck_files(p->files, i);
break;
}
}
- rcu_read_unlock();
+ spin_unlock(&p->files->file_lock);
}
task_unlock(p);
} while_each_task_pid(session, PIDTYPE_SID, p);
#define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
+#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
+#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
+
#define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte_low, 0))
#define pte_same(a, b) ((a).pte_low == (b).pte_low)
#define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
pmd_index(address))
+/*
+ * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
+ * entry, so clear the bottom half first and enforce ordering with a compiler
+ * barrier.
+ */
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ ptep->pte_low = 0;
+ smp_wmb();
+ ptep->pte_high = 0;
+}
+
+static inline void pmd_clear(pmd_t *pmd)
+{
+ u32 *tmp = (u32 *)pmd;
+ *tmp = 0;
+ smp_wmb();
+ *(tmp + 1) = 0;
+}
+
static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
pte_t res;
extern unsigned long pg0[];
#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
#define pmd_none(x) (!(unsigned long)pmd_val(x))
/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
can temporarily clear it. */
#define pmd_present(x) (pmd_val(x))
-#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
pte_t pte;
if (full) {
pte = *ptep;
-#ifdef CONFIG_X86_PAE
- /* Cannot do this in a single step, as the compiler may
- issue the two stores in either order, but the hypervisor
- must not see the high part before the low one. */
- ptep->pte_low = 0;
- barrier();
- ptep->pte_high = 0;
-#else
- *ptep = __pte(0);
-#endif
+ pte_clear(mm, addr, ptep);
} else {
pte = ptep_get_and_clear(mm, addr, ptep);
}
unsigned long private; /* Mapping-private opaque data:
* usually used for buffer_heads
* if PagePrivate set; used for
- * swp_entry_t if PageSwapCache.
- * When page is free, this
+ * swp_entry_t if PageSwapCache;
* indicates order in the buddy
- * system.
+ * system if PG_buddy is set.
*/
struct address_space *mapping; /* If low bit clear, points to
* inode address_space, or NULL.
1 << PG_reclaim |
1 << PG_slab |
1 << PG_swapcache |
- 1 << PG_writeback );
+ 1 << PG_writeback |
+ 1 << PG_buddy );
set_page_count(page, 0);
reset_page_mapcount(page);
page->mapping = NULL;
static inline void set_page_order(struct page *page, int order) {
set_page_private(page, order);
- __SetPagePrivate(page);
+ __SetPageBuddy(page);
}
static inline void rmv_page_order(struct page *page)
{
- __ClearPagePrivate(page);
+ __ClearPageBuddy(page);
set_page_private(page, 0);
}
* This function checks whether a page is free && is the buddy
* we can do coalesce a page and its buddy if
* (a) the buddy is not in a hole &&
- * (b) the buddy is free &&
- * (c) the buddy is on the buddy system &&
- * (d) a page and its buddy have the same order.
- * for recording page's order, we use page_private(page) and PG_private.
+ * (b) the buddy is in the buddy system &&
+ * (c) a page and its buddy have the same order.
+ *
+ * For recording whether a page is in the buddy system, we use PG_buddy.
+ * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
*
+ * For recording page's order, we use page_private(page).
*/
static inline int page_is_buddy(struct page *page, int order)
{
return 0;
#endif
- if (PagePrivate(page) &&
- (page_order(page) == order) &&
- page_count(page) == 0)
+ if (PageBuddy(page) && page_order(page) == order) {
+ BUG_ON(page_count(page) != 0);
return 1;
+ }
return 0;
}
* as necessary, plus some accounting needed to play nicely with other
* parts of the VM system.
* At each level, we keep a list of pages, which are heads of continuous
- * free pages of length of (1 << order) and marked with PG_Private.Page's
+ * free pages of length of (1 << order) and marked with PG_buddy. Page's
* order is recorded in page_private(page) field.
* So when we are allocating or freeing one, we can derive the state of the
* other. That is, if we allocate a small block, and both were
1 << PG_slab |
1 << PG_swapcache |
1 << PG_writeback |
- 1 << PG_reserved ))))
+ 1 << PG_reserved |
+ 1 << PG_buddy ))))
bad_page(page);
if (PageDirty(page))
__ClearPageDirty(page);
1 << PG_slab |
1 << PG_swapcache |
1 << PG_writeback |
- 1 << PG_reserved ))))
+ 1 << PG_reserved |
+ 1 << PG_buddy ))))
bad_page(page);
/*
switch(dev->reg_state) {
case NETREG_REGISTERING:
+ dev->reg_state = NETREG_REGISTERED;
err = netdev_register_sysfs(dev);
if (err)
printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
dev->name, err);
- dev->reg_state = NETREG_REGISTERED;
break;
case NETREG_UNREGISTERING: